* Begin log file
capture log close sublog
log using "$projdir/log/5_mcd_synth.txt", name(sublog) text replace

*-------------------------------------------------------------------------------
* Description: Estimate the uncorrected, and bias-corrected, and pandemic-corrected
* synthetic control results for individual counties for CA and NY for MCD subsample
* Author: Denis Sosinskiy
*
* Updated: August 4, 2023
*-------------------------------------------------------------------------------
clear

* Create or clear necessary folders
local y = "avg_wkly_wage"
local n = 722513
foreach loc in California NY {		
	* Ensure necessary folders exist
	capture mkdir "$projdir/dta/analysis/qcew"
	capture mkdir "$projdir/dta/analysis/qcew/`y'"
	capture mkdir "$projdir/dta/analysis/qcew/`y'/naics`n'"
	capture mkdir "$projdir/dta/analysis/qcew/`y'/naics`n'"
	capture mkdir "$projdir/dta/analysis/qcew/`y'/naics`n'/`loc'_subsample"
	capture mkdir "$projdir/dta/analysis/qcew/`y'/naics`n'/`loc'_subsample/corrected"
				
	* Clear the old .dta files (if any)
	di "Erasing previously saved outcome data"
	di "..."
	di
	qui local filelist: dir "$projdir/dta/analysis/qcew/`y'/naics`n'/`loc'_subsample" files "*.dta"
	di "Filelist:" `filelist'
	foreach f of local filelist {
		qui erase "$projdir/dta/analysis/qcew/`y'/naics`n'/`loc'_subsample/`f'"
	}
		
	* And the "corrected" folder
	qui local filelist: dir "$projdir/dta/analysis/qcew/`y'/naics`n'/`loc'_subsample/corrected" files "*.dta"
	di "Filelist:" `filelist'
	qui gettoken first filelist : filelist
		
	* Clear the old .dta files (if any)
	qui local filelist: dir "$projdir/dta/analysis/qcew/`y'/naics`n'/`loc'_subsample/corrected" files "*.dta"
	di "Filelist:" `filelist'
	foreach f of local filelist {
		qui erase "$projdir/dta/analysis/qcew/`y'/naics`n'/`loc'_subsample/corrected/`f'"
	}
}

*---------------------------------------------------------------------------
* California
*---------------------------------------------------------------------------

* Create tempfiles
tempfile core
tempfile core0
tempfile core1
tempfile core2
tempfile core3
tempfile core4

* Set start and end quarters
local trp = 218
local start_tm = 199
local end_tm = 251 
local tot_tm = `end_tm' - `start_tm' + 1
local endqtrlab "2019Q4"
if `end_tm' == 251 {
	local endqtrlab "2022Q4"
}

* Ensure distinct package is installed
capture ssc install distinct

* Load the data
use "$projdir/dta/build/cln/analysis_panel_cty.dta", clear

* Rename key variables
qui rename (quarterly_date earnings lnemp ind) (tm demean_lnearn demean_lnemp naics)
format tm %tq

* Generate unique county id and drop obs which don't observe it
qui gen cty_fips = 1000*statefips + countyfips
qui keep if !mi(cty_fips) & !mi(avg_wkly_wage) & !mi(employment) & avg_wkly_wage != 0 & employment != 0
assert cty_fips
assert avg_wkly_wage
assert employment
assert !mi(avg_wkly_wage)
assert !mi(employment)

* Recode pre-2011 full- and limited-service restaurant NAICS codes
qui recode naics (722211=722513) if year <= 2011

* Keep NAICS 10, 722, and 722513
qui keep if inlist(naics, 10, 722, 722513)

* Sort
sort cty_fips tm naics

* Restrict to desired time-period
keep if inrange(tm, `start_tm', `end_tm')

* Identify treated and donor counties
levelsof cty_fips if statefips == 6, local(trcty)
local trctylist = subinstr("`trcty'", " ", ",",.)
qui distinct cty_fips if inlist(cty_fips, `trctylist')
local trcty_count = r(ndistinct)

* Generate donor variable
qui gen donor = !inlist(cty_fips, `trctylist')

* Restrict to donor counties with no min wage changes (and the treated counties)
qui egen double max_loc_max = max(loc_max_mw), by(cty_fips)
qui egen double min_loc_max = min(loc_max_mw), by(cty_fips)
qui gen x = (naics == 722 & min_emp >= 5000)
qui drop if cty_fips == 13121
bysort cty_fips: egen empkeep = max(x)
qui keep if (max_loc_max == min_loc_max | donor == 0) & empkeep == 1
if `end_tm' > 239 {
	qui drop if inlist(floor(cty_fips/1000), 25, 51)
}

* Restrict to counties in MCD data
sort cty_fips
merge m:1 cty_fips using "$projdir/dta/build/cln/mcd_cty_subsample.dta", nogen keep(3)

* Save tempfile
qui compress
qui save "`core'", replace

* Make county change adjustments and get mean of avg_wkly_wage over remaining counties
qui do "$projdir/do/build/cw/cty_changes_NYCnocombine.do"
qui collapse (mean) avg_wkly_wage donor year qtr [aw=employment], by(cty_fips tm naics)
assert inlist(donor, 0, 1)

* Save tempfile
qui save "`core2'", replace

* Reload the data
qui use "`core'", clear

* Make county change adjustments and get sum of employment over remaining counties
qui do "$projdir/do/build/cw/cty_changes_NYCnocombine.do"
qui collapse (sum) employment pop10, by(cty_fips tm naics)

* Merge
qui merge 1:1 cty_fips tm naics using "`core2'", nogen norep

* Keep only a balanced panel over periods of interest
qui keep if inrange(tm, `start_tm', `end_tm')	
bysort cty_fips naics: gen ct = _N
qui keep if ct == `tot_tm'

* Merge with the LAUS data
qui merge m:1 cty_fips year using "$projdir/dta/build/cln/laus_cty_NYCnocombine.dta", nogen norep keep(3)

* Merge with the Covid-impact index
qui save "`core'", replace
qui use "$projdir/dta/build/cln/covid_index.dta", clear
qui rename (countyfips) (cty_fips)
qui gen covid_index = (workplace + retail)/2
qui keep cty_fips covid_index
qui save "`core2'", replace
qui use "`core'", clear
merge m:1 cty_fips using "`core2'", nogen norep keep(3)

* Observe overall employment in each period
qui gen x = employment if naics == 10
bysort cty_fips tm: egen emp10 = max(x)
qui drop x
qui drop if naics == 10

* Keep variables of interest	
qui keep cty_fips tm naics avg_wkly_wage employment emp10 pop10 unr covid_index donor

* Save
qui save "`core0'", replace

* For each of 722 and 722513
foreach n in 722 722513 {
	
	* Load and restrict
	qui use "`core0'", clear
	qui keep if naics == `n'

	* Put in % of 2014q2 levels
	foreach v in employment avg_wkly_wage emp10 {
		qui gen x = `v' if tm == 217
		bysort cty_fips: egen xx = max(x)
		qui replace `v' = 100*`v'/xx
		qui drop x xx
	}

	* Reshape data to debias it
	local predvars ""
	forval t = 199/207 {
		qui gen x = employment if tm == `t'
		bysort cty_fips: egen emp_`t' = max(x)
		qui drop x
		local predvars "`predvars' emp_`t'"
		qui gen x = avg_wkly_wage if tm == `t'
		bysort cty_fips: egen earn_`t' = max(x)
		qui drop x
		local predvars "`predvars' earn_`t'"
		qui gen x = emp10 if tm == `t'
		bysort cty_fips: egen emp10_`t' = max(x)
		qui drop x
		local predvars "`predvars' emp10_`t'"
	}
	foreach v in employment avg_wkly_wage emp10 unr {
		bysort cty_fips: egen x = mean(`v') if inrange(tm, 199, 207)
		bysort cty_fips: egen mean_`v' = max(x)
		qui drop x
		local predvars "`predvars' mean_`v'"
	}
	save "`core1'", replace

	* "Covid- and bias-correct" the outcomes of interest
	qui gen p_employment = .
	qui gen p_avg_wkly_wage = .
	qui levelsof tm, local(Time)
	foreach v in employment avg_wkly_wage {
		foreach t of local Time {
			qui reg `v' `predvars' covid_index [aw=pop10] if tm == `t' & donor == 1
			qui predict p_`v'_`t', resid
			qui replace p_`v' = p_`v'_`t' if tm == `t'
			qui drop p_`v'_`t'
		}
	}
		
	* Keep key variables
	qui keep cty_fips tm naics employment avg_wkly_wage p_* emp10 pop10 unr donor
		
	* Compress, sort, and save
	qui compress
	qui sort cty_fips tm
	if `n' == 722513 {
		qui append using "`core'"
	}
	qui save "`core'", replace
}

* Get the synthetic control weights for each treated county
qui use "`core'", clear
qui levelsof cty_fips if donor == 0, local(trcty)

foreach c of local trcty {
	local y = "avg_wkly_wage" 
	local n = 722513 
		if (`n' == 722513 & `c' != 6079) { // San Luis Obispo doesn't have a complete panel for fast food
				
			* Set macros
			local titloc "Average weekly earnings"
			local transform "transform(`y', normalize)"
			local ytitloc "Normalized Gap (%)"
			
			* Re-load the data
			qui use "`core'", clear
						
			* Restrict to selected sample of counties observed for NAICS `n', and ensure a balanced panel
			qui keep if naics == `n'
			qui keep if cty_fips == `c' | donor == 1
			bysort cty_fips: gen N = _N
			qui keep if N == `tot_tm'
			drop N
			local tlab "tlabel(2009Q4(4)`endqtrlab', angle(45))"
			qui levelsof cty_fips if inlist(cty_fips, `trctylist'), local(county)
			qui distinct cty_fips if donor == 1
			local donorct = r(ndistinct)
			qui distinct cty_fips if inlist(cty_fips, `trctylist')
			di "Balanced panel exists for `r(ndistinct)' of `trcty_count' treated counties and `donorct' donor pool counties"

			* Save tempfile
			save "`core2'", replace
						
			* Create population-based weights for the averaging
			qui gen x = pop10
			bysort cty_fips: egen popwt = max(x)
					
			* Create the list of pre-treatment outcome period predictors
			local emp10preds
			local normemp
			local ypreds
			local predend = 207
			forval t = `start_tm'/`predend' {
				local ypreds "`ypreds' `y'(`t')"
				local emp10preds "`emp10preds' emp10(`t')"
				if `t' == `predend' {
					local emp10preds "`emp10preds' emp10(`start_tm'(1)`predend')"
				}
				local normemp "emp10"
			}
	
			* tsset
			format tm %tq
			tsset cty_fips tm, quarterly
			qui gen treated = (donor == 0) 
			qui gen trp = 218*treated
					
			#delimit ;
				allsynth4
					`y' `ypreds' `y'(`start_tm'(1)`predend') unr(`start_tm'(4)`predend') `emp10preds', 
					trunit(`c') trperiod(`trp') 
					bcorrect(merge) 
					transform(`y' `normemp', normalize)
					pvalues(rmspe)
					keep($projdir/dta/analysis/qcew/`y'/naics`n'/California_subsample/tr_`c') replace;
				#delimit cr
						
			* Load the estimates data
			qui use "$projdir/dta/analysis/qcew/`y'/naics`n'/California_subsample/tr_`c'", clear
					
			* Get the weights
			qui keep cty_fips _Co_Number _W_Weight
			qui keep if !mi(_W_) & _W_ > 0
			qui rename cty_fips county_fips
			qui rename _Co_Number cty_fips
			qui levelsof county_fips, local(units)
			qui gettoken first : units
			qui save "`core2'", replace
					
			* For each unit, merge the weights into the core data
			foreach i of local units {
				qui use "`core2'", clear
				qui keep if county_fips == `i'
				qui save "`core3'", replace
				qui use "`core'", clear
				qui keep if naics == `n'
				qui keep cty_fips tm `y' p_`y'
				merge m:1 cty_fips using "`core3'", nogen norep keep(3)
				if `i' != `first' {
					qui append using "`core4'"
				}
				qui save "`core4'", replace
			}

			* Collapse to get the synthetic unit estimates for the treated unit and the donor pool units and save
			qui collapse (mean) p_`y'_synthetic=p_`y' [aw=_W_Weight], by(county_fips tm)
			qui rename county_fips cty_fips
			qui save "`core2'", replace

			* Merge into core data for the treated unit, and save
			qui use "`core'", clear
			qui keep if naics == `n'
			qui keep cty_fips tm p_`y' pop10
			qui rename p_`y' p_`y'_treated
			qui merge 1:1 cty_fips tm using "`core2'", nogen norep
			qui rename tm _time
			qui gen p_gap = p_`y'_treated - p_`y'_synthetic
			qui gen trunit = `c'
			qui gen trperiod = `trp'
			qui save "$projdir/dta/analysis/qcew/`y'/naics`n'/California_subsample/corrected/tr_`c'", replace
		}
}


*---------------------------------------------------------------------------
* NY counties
*---------------------------------------------------------------------------
clear all

* Create tempfiles
tempfile core
tempfile core0
tempfile core1
tempfile core2
tempfile core3
tempfile core4

* Set start and end quarters
local trp = 216
local start_tm = 199
local end_tm = 251 
local tot_tm = `end_tm' - `start_tm' + 1
local endqtrlab "2019Q4"
if `end_tm' == 251 {
	local endqtrlab "2022Q4"
}

* Ensure distinct package is installed
capture ssc install distinct

* Load the data
use "$projdir/dta/build/cln/analysis_panel_cty.dta", clear

* Rename key variables
qui rename (quarterly_date earnings lnemp ind) (tm demean_lnearn demean_lnemp naics)
format tm %tq

* Generate unique county id and drop obs which don't observe it
qui gen cty_fips = 1000*statefips + countyfips
qui keep if !mi(cty_fips) & !mi(avg_wkly_wage) & !mi(employment) & avg_wkly_wage != 0 & employment != 0
assert cty_fips
assert avg_wkly_wage
assert employment
assert !mi(avg_wkly_wage)
assert !mi(employment)

* Recode pre-2011 full- and limited-service restaurant NAICS codes
qui recode naics (722211=722513) if year <= 2011

* Keep NAICS 10, 722, and 722513
qui keep if inlist(naics, 10, 722, 722513)

* Sort
sort cty_fips tm naics

* Restrict to desired time-period
keep if inrange(tm, `start_tm', `end_tm')

* Identify treated and donor counties
levelsof cty_fips if statefips == 36, local(trcty)
local trctylist = subinstr("`trcty'", " ", ",",.)
qui distinct cty_fips if inlist(cty_fips, `trctylist')
local trcty_count = r(ndistinct)

* Generate donor variable
qui gen donor = !inlist(cty_fips, `trctylist')

* Restrict to donor counties with no min wage changes (and the treated counties)
qui egen double max_loc_max = max(loc_max_mw), by(cty_fips)
qui egen double min_loc_max = min(loc_max_mw), by(cty_fips)
qui gen x = (naics == 722 & min_emp >= 5000)
qui drop if cty_fips == 13121 // Fulton County, GA has an issue in the raw earnings data
bysort cty_fips: egen empkeep = max(x)
qui keep if ((max_loc_max == min_loc_max | donor == 0) & empkeep == 1) | floor(cty_fips/1000) == 36
if `end_tm' > 239 {
	qui drop if inlist(floor(cty_fips/1000), 25, 51)
}

* Save tempfile
qui compress
qui save "`core'", replace

* Make county change adjustments and get mean of avg_wkly_wage over remaining counties
qui do "$projdir/do/build/cw/cty_changes_NYCnocombine.do"
qui collapse (mean) avg_wkly_wage donor year qtr [aw=employment], by(cty_fips tm naics)
assert inlist(donor, 0, 1)

* Save tempfile
qui save "`core2'", replace

* Reload the data
qui use "`core'", clear

* Make county change adjustments and get sum of employment over remaining counties
qui do "$projdir/do/build/cw/cty_changes_NYCnocombine.do"
qui collapse (sum) employment pop10, by(cty_fips tm naics)

* Merge
qui merge 1:1 cty_fips tm naics using "`core2'", nogen norep

* Keep only a balanced panel over periods of interest
qui keep if inrange(tm, `start_tm', `end_tm')	
bysort cty_fips naics: gen ct = _N
qui keep if ct == `tot_tm'

* Merge with the LAUS data
qui merge m:1 cty_fips year using "$projdir/dta/build/cln/laus_cty_NYCnocombine.dta", nogen norep keep(3)

* Keep donors and larger NY counties
qui keep if donor == 1 | inlist(cty_fips, 36001, 36005, 36047, 36061, 36081, 36055, 36059, 36067, 36071, 36103, 36119)

* Keep only MCD subsample
sort cty_fips
merge m:1 cty_fips using "$projdir/dta/build/cln/mcd_cty_subsample.dta", nogen keep(3)

* Merge with the Covid-impact index
qui save "`core'", replace
qui use "$projdir/dta/build/cln/covid_index.dta", clear
qui rename (countyfips) (cty_fips)
qui gen covid_index = (workplace + retail)/2
qui keep cty_fips covid_index
qui save "`core2'", replace
qui use "`core'", clear
merge m:1 cty_fips using "`core2'", nogen norep keep(3)

* Observe overall employment in each period
qui gen x = employment if naics == 10
bysort cty_fips tm: egen emp10 = max(x)
qui drop x
qui drop if naics == 10

* Keep variables of interest	
qui keep cty_fips tm naics avg_wkly_wage employment emp10 pop10 unr covid_index donor

* Only for fast food
qui keep if naics == 722513
local n = 722513

* Save
qui save "`core0'", replace

* Load and restrict
qui use "`core0'", clear

* Put in % of 2014q2 levels
foreach v in employment avg_wkly_wage emp10 {
	qui gen x = `v' if tm == 215
	bysort cty_fips: egen xx = max(x)
	qui replace `v' = 100*`v'/xx
	qui drop x xx
}

* Reshape data to debias it
local predvars ""
forval t = 199/207 {
	qui gen x = employment if tm == `t'
	bysort cty_fips: egen emp_`t' = max(x)
	qui drop x
	local predvars "`predvars' emp_`t'"
	qui gen x = avg_wkly_wage if tm == `t'
	bysort cty_fips: egen earn_`t' = max(x)
	qui drop x
	local predvars "`predvars' earn_`t'"
	qui gen x = emp10 if tm == `t'
	bysort cty_fips: egen emp10_`t' = max(x)
	qui drop x
	local predvars "`predvars' emp10_`t'"
}
foreach v in employment avg_wkly_wage emp10 unr {
	bysort cty_fips: egen x = mean(`v') if inrange(tm, 199, 207)
	bysort cty_fips: egen mean_`v' = max(x)
	qui drop x
	local predvars "`predvars' mean_`v'"
}
save "`core1'", replace

* "Covid- and bias-correct" the outcomes of interest
qui gen p_employment = .
qui gen p_avg_wkly_wage = .
qui levelsof tm, local(Time)
foreach v in employment avg_wkly_wage {
	foreach t of local Time {
		qui reg `v' `predvars' covid_index [aw=pop10] if tm == `t' & donor == 1
		qui predict p_`v'_`t', resid
		qui replace p_`v' = p_`v'_`t' if tm == `t'
		qui drop p_`v'_`t'
	}
}
		
* Keep key variables
qui keep cty_fips tm naics employment avg_wkly_wage p_* emp10 pop10 unr donor
		
* Compress, sort, and save
qui compress
qui sort cty_fips tm
qui save "`core'", replace

* Get the synthetic control weights for each treated county
qui use "`core'", clear
qui levelsof cty_fips if donor == 0, local(trcty)
	
foreach c of local trcty {
	local y = "avg_wkly_wage"

	* Set macros
	local titloc "Average weekly earnings"
	local transform "transform(`y', normalize)"
	local ytitloc "Normalized Gap (%)"
			
	* Re-load the data
	qui use "`core'", clear
						
	* Restrict to selected sample of counties observed for NAICS `n', and ensure a balanced panel
	qui keep if naics == `n'
	qui keep if cty_fips == `c' | donor == 1
	bysort cty_fips: gen N = _N
	qui keep if N == `tot_tm'
	drop N
	local tlab "tlabel(2009Q4(4)`endqtrlab', angle(45))"
	qui levelsof cty_fips if inlist(cty_fips, `trctylist'), local(county)
	qui distinct cty_fips if donor == 1
	local donorct = r(ndistinct)
	qui distinct cty_fips if inlist(cty_fips, `trctylist')
	di "Balanced panel exists for `r(ndistinct)' of `trcty_count' treated counties and `donorct' donor pool counties"

	* Save tempfile
	save "`core2'", replace
						
	* Create population-based weights for the averaging
	qui gen x = pop10
	bysort cty_fips: egen popwt = max(x)
				
	* Create the list of pre-treatment outcome period predictors
	local emp10preds
	local normemp
	local ypreds
	local predend = 207
	forval t = `start_tm'/`predend' {
		local ypreds "`ypreds' `y'(`t')"
		local emp10preds "`emp10preds' emp10(`t')"
		if `t' == `predend' {
			local emp10preds "`emp10preds' emp10(`start_tm'(1)`predend')"
		}
		local normemp "emp10"
	}
	
	* tsset
	format tm %tq
	tsset cty_fips tm, quarterly
	qui gen treated = (donor == 0) 
	qui gen trp = 216*treated
					
	#delimit ;
		allsynth4
			`y' `ypreds' `y'(`start_tm'(1)`predend') unr(`start_tm'(4)`predend') `emp10preds', 
			trunit(`c') trperiod(`trp') 
			bcorrect(merge) 
			transform(`y' `normemp', normalize)
			pvalues(rmspe)
			keep("$projdir/dta/analysis/qcew/`y'/naics`n'/NY_subsample/tr_`c'") replace;
		#delimit cr
						
	* Load the estimates data
	qui use "$projdir/dta/analysis/qcew/`y'/naics`n'/NY_subsample/tr_`c'", clear
					
	* Get the weights
	qui keep cty_fips _Co_Number _W_Weight
	qui keep if !mi(_W_) & _W_ > 0
	qui rename cty_fips county_fips
	qui rename _Co_Number cty_fips
	qui levelsof county_fips, local(units)
	qui gettoken first : units
	qui save "`core2'", replace
					
	* For each unit, merge the weights into the core data
	foreach i of local units {
		qui use "`core2'", clear
		qui keep if county_fips == `i'
		qui save "`core3'", replace
		qui use "`core'", clear
		qui keep if naics == `n'
		qui keep cty_fips tm `y' p_`y'
		merge m:1 cty_fips using "`core3'", nogen norep keep(3)
		if `i' != `first' {
			qui append using "`core4'"
		}
		qui save "`core4'", replace
	}

	* Collapse to get the synthetic unit estimates for the treated unit and the donor pool units and save
	qui collapse (mean) p_`y'_synthetic=p_`y' [aw=_W_Weight], by(county_fips tm)
	qui rename county_fips cty_fips
	qui save "`core2'", replace

	* Merge into core data for the treated unit, and save
	qui use "`core'", clear
	qui keep if naics == `n'
	qui keep cty_fips tm p_`y' pop10
	qui rename p_`y' p_`y'_treated
	qui merge 1:1 cty_fips tm using "`core2'", nogen norep
	qui rename tm _time
	qui gen p_gap = p_`y'_treated - p_`y'_synthetic
	qui gen trunit = `c'
	qui gen trperiod = `trp'
	qui save "$projdir/dta/analysis/qcew/`y'/naics`n'/NY_subsample/corrected/tr_`c'", replace
}

*---------------------------------------------------------------------------
* Assign weights from SC to counties in MCD sample
*---------------------------------------------------------------------------
clear all

* Create or clear folders
foreach loc in California NY {
	foreach y in price hwage { 

		* Ensure necessary folders exist
		capture mkdir "$projdir/dta/analysis/mcd"
		capture mkdir "$projdir/dta/analysis/mcd/`y'"
		capture mkdir "$projdir/dta/analysis/mcd/`y'/`loc'_subsample"
		capture mkdir "$projdir/dta/analysis/mcd/`y'/`loc'_subsample/corrected"

		* Clear the old .dta files (if any)
		di "Erasing previously saved outcome data"
		di "..."
		di
		qui local filelist: dir "$projdir/dta/analysis/mcd/`y'/`loc'_subsample" files "*.dta"
		di "Filelist:" `filelist'
		foreach f of local filelist {
			qui erase "$projdir/dta/analysis/mcd/`y'/`loc'_subsample/`f'"
		}
				
		* And the "corrected" folder
		qui local filelist: dir "$projdir/dta/analysis/mcd/`y'/`loc'_subsample/corrected" files "*.dta"
		di "Filelist:" `filelist'
		qui gettoken first filelist : filelist
				
		* Clear the old .dta files (if any)
		qui local filelist: dir "$projdir/dta/analysis/mcd/`y'/`loc'_subsample/corrected" files "*.dta"
		di "Filelist:" `filelist'
		foreach f of local filelist {
			qui erase "$projdir/dta/analysis/mcd/`y'/`loc'_subsample/corrected/`f'"
		}
	}
}

*---------------------------------------------------------------------------
* California
*---------------------------------------------------------------------------

* Create tempfiles
tempfile core
tempfile core0
tempfile core1
tempfile core2
tempfile core3
tempfile core4

* Set start and end years
local trp = 2014
local start_tm = 2016
local end_tm = 2022
local tot_tm = `end_tm' - `start_tm' + 1
local endyearlab "2022"

* Ensure distinct package is installed
capture ssc install distinct

* Load the data
use "$projdir/dta/build/src/MR_fips_McW.dta", clear

* Generate price
qui gen price = hwage/BMPH
qui gen mark_up = 1/BMPH

* Rename and keep key variables
qui rename (fips BMPH) (cty_fips wage_price)
keep cty_fips year price hwage mark_up wage_price

* Drop obs which don't observe county fips or key vars
qui keep if !mi(cty_fips) & !mi(price) &!mi(hwage) & price != 0 & hwage != 0
assert cty_fips
assert price
assert hwage
assert !mi(price)
assert !mi(hwage)
assert !mi(year)

* Sort
sort cty_fips year

* Merge pop10
qui save "`core0'", replace
use "$projdir/dta/build/cln/analysis_panel_cty.dta", clear
qui gen cty_fips = 1000*statefips + countyfips
qui keep if !mi(cty_fips)
assert cty_fips
keep cty_fips pop10
unab vlist : _all
sort `vlist'
quietly by `vlist':  gen dup = cond(_N==1,0,_n)
drop if dup>1
drop dup
sort cty_fips
qui save "`core1'", replace
qui use "`core0'", clear
merge m:1 cty_fips using `core1', nogen keep(1 3)

* Identify treated and donor counties
qui gen statefips = floor(cty_fips/1000)
levelsof cty_fips if statefips == 6, local(trcty)
local trctylist = subinstr("`trcty'", " ", ",",.)
qui distinct cty_fips if inlist(cty_fips, `trctylist')
local trcty_count = r(ndistinct)

* Generate donor variable
qui gen donor = !inlist(cty_fips, `trctylist')

* Keep subsample of counties
merge m:1 cty_fips using "$projdir/dta/build/cln/mcd_cty_subsample.dta", nogen keep(3)

* Put in % of 2016 levels
foreach v in price hwage mark_up wage_price{
	qui gen x = `v' if year == 2016
	bysort cty_fips: egen xx = max(x)
	qui replace `v' = 100*`v'/xx
	qui drop x xx
}

* Save tempfile
qui compress
qui save "`core'", replace

* Get the synthetic control weights for each treated county
qui levelsof cty_fips if donor == 0, local(trcty)

foreach c of local trcty {
	foreach y in price hwage {
		if `c' != 6079 { // San Luis Obispo doesn't have a complete panel for fast food
							
		* Load the estimates data
		qui use "$projdir/dta/analysis/qcew/avg_wkly_wage/naics722513/California_subsample/tr_`c'", clear
						
		* Get the weights and rescale
		qui keep cty_fips _Co_Number _W_Weight
		qui keep if !mi(_W_) & _W_ > 0
		qui rename cty_fips county_fips
		qui rename _Co_Number cty_fips
		qui levelsof county_fips, local(units)
		qui gettoken first : units
		qui save "`core2'", replace
						
		* For each unit, merge the weights into the core data
		foreach i of local units {
			qui use "`core2'", clear
			qui keep if county_fips == `i'
			qui save "`core3'", replace
			qui use "`core'", clear
			merge m:1 cty_fips using "`core3'", nogen norep keep(3)
			if `i' != `first' {
				qui append using "`core4'"
			}
			qui save "`core4'", replace
		}
		
		bysort county_fips year: egen sum_w = sum(_W_Weight)
		assert sum_w >= 0.99 & sum_w <= 1
		drop sum_w

		* Collapse to get the synthetic unit estimates for the treated unit and the donor pool units and save
		qui collapse (mean) `y'_synthetic=`y' [aw=_W_Weight], by(county_fips year)
		qui rename county_fips cty_fips
		qui save "`core2'", replace

		* Merge into core data for the treated unit, and save
		qui use "`core'", clear
		qui rename `y' `y'_treated
		qui merge 1:1 cty_fips year using "`core2'", nogen norep
		qui rename year _time
		qui gen p_gap = `y'_treated -`y'_synthetic
		qui gen trunit = `c'
		qui gen trperiod = `trp'
		qui save "$projdir/dta/analysis/mcd/`y'/California_subsample/tr_`c'", replace
		}
	}
}


*---------------------------------------------------------------------------
* NY counties
*---------------------------------------------------------------------------
clear all

* Create tempfiles
tempfile core
tempfile core0
tempfile core1
tempfile core2
tempfile core3
tempfile core4

* Set start and end years
local trp = 2014
local start_tm = 2016
local end_tm = 2022
local tot_tm = `end_tm' - `start_tm' + 1
local endyearlab "2022"


* Ensure distinct package is installed
capture ssc install distinct

* Load the data
use "$projdir/dta/build/src/MR_fips_McW.dta", clear

* Generate price
qui gen price = hwage/BMPH
qui gen mark_up = 1/BMPH

* Rename and keep key variables
qui rename (fips BMPH) (cty_fips wage_price)
keep cty_fips year price hwage mark_up wage_price

* Drop obs which don't observe county fips or key vars
qui keep if !mi(cty_fips) & !mi(price) &!mi(hwage) & price != 0 & hwage != 0
assert cty_fips
assert price
assert hwage
assert !mi(price)
assert !mi(hwage)
assert !mi(year)

* Sort
sort cty_fips year
qui save "`core0'", replace

* Merge pop10
qui save "`core0'", replace
use "$projdir/dta/build/cln/analysis_panel_cty.dta", clear
qui gen cty_fips = 1000*statefips + countyfips
qui keep if !mi(cty_fips)
assert cty_fips
keep cty_fips pop10
unab vlist : _all
sort `vlist'
quietly by `vlist':  gen dup = cond(_N==1,0,_n)
drop if dup>1
drop dup
sort cty_fips
qui save "`core1'", replace
qui use "`core0'", clear
merge m:1 cty_fips using `core1', nogen keep(1 3)

* Identify treated and donor counties
qui gen statefips = floor(cty_fips/1000)
levelsof cty_fips if statefips == 36, local(trcty)
local trctylist = subinstr("`trcty'", " ", ",",.)
qui distinct cty_fips if inlist(cty_fips, `trctylist')
local trcty_count = r(ndistinct)

* Generate donor variable
qui gen donor = !inlist(cty_fips, `trctylist')

* Keep subsample of counties
merge m:1 cty_fips using "$projdir/dta/build/cln/mcd_cty_subsample.dta", nogen keep(3)

* Put in % of 2016 levels
foreach v in price hwage mark_up wage_price{
	qui gen x = `v' if year == 2016
	bysort cty_fips: egen xx = max(x)
	qui replace `v' = 100*`v'/xx
	qui drop x xx
}

* Save tempfile
qui compress
qui save "`core'", replace

* Get the synthetic control weights for each treated county
qui levelsof cty_fips if donor == 0, local(trcty)
	
foreach c of local trcty {
	foreach y in price hwage {
	
		* Load the estimates data
		qui use "$projdir/dta/analysis/qcew/avg_wkly_wage/naics722513/NY_subsample/tr_`c'", clear
					
		* Get the weights and rescale
		qui keep cty_fips _Co_Number _W_Weight
		qui keep if !mi(_W_) & _W_ > 0
		qui rename cty_fips county_fips
		qui rename _Co_Number cty_fips
		qui levelsof county_fips, local(units)
		qui gettoken first : units
		qui save "`core2'", replace
					
		* For each unit, merge the weights into the core data
		foreach i of local units {
			qui use "`core2'", clear
			qui keep if county_fips == `i'
			qui save "`core3'", replace
			qui use "`core'", clear
			merge m:1 cty_fips using "`core3'", nogen norep keep(3)
			if `i' != `first' {
				qui append using "`core4'"
			}
			qui save "`core4'", replace
		}
		
		bysort county_fips year: egen sum_w = sum(_W_Weight)
		assert sum_w >= 0.99 & sum_w <= 1
		drop sum_w

		* Collapse to get the synthetic unit estimates for the treated unit and the donor pool units and save
		qui collapse (mean) `y'_synthetic=`y' [aw=_W_Weight], by(county_fips year)
		qui rename county_fips cty_fips
		qui save "`core2'", replace

		* Merge into core data for the treated unit, and save
		qui use "`core'", clear
		qui rename `y' `y'_treated
		qui merge 1:1 cty_fips year using "`core2'", nogen norep
		qui rename year _time
		qui gen p_gap = `y'_treated -`y'_synthetic
		qui gen trunit = `c'
		qui gen trperiod = `trp'
		qui save "$projdir/dta/analysis/mcd/`y'/NY_subsample/tr_`c'", replace
	}
}

* Close the log
log close sublog
